{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 基础爬虫类(框架)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests\n",
    "import re\n",
    "\n",
    "class MyCrawler:\n",
    "    def __init__(self, filename):\n",
    "        self.filename = filename\n",
    "        self.headers =  {\n",
    "            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36',\n",
    "        }\n",
    "    \n",
    "    def download(self, url):\n",
    "        r = requests.get(url, headers=self.headers)\n",
    "        return r.text\n",
    "    \n",
    "    def extract(self, content, pattern):\n",
    "        result = re.findall(pattern, content)\n",
    "        return result\n",
    "    \n",
    "    def save(self, info):\n",
    "        with open(self.filename, 'a', encoding='utf-8') as f:\n",
    "            for item in info:\n",
    "                f.write('|||'.join(item) + '\\n')\n",
    "    \n",
    "    def crawl(self, url, pattern, headers=None):\n",
    "        if headers:\n",
    "            self.headers.update(headers)\n",
    "        content = self.download(url)\n",
    "        info = self.extract(content, pattern)\n",
    "        self.save(info)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\u001b[1;31mDocstring:\u001b[0m\n",
       "D.update([E, ]**F) -> None.  Update D from dict/iterable E and F.\n",
       "If E is present and has a .keys() method, then does:  for k in E: D[k] = E[k]\n",
       "If E is present and lacks a .keys() method, then does:  for k, v in E: D[k] = v\n",
       "In either case, this is followed by: for k in F:  D[k] = F[k]\n",
       "\u001b[1;31mType:\u001b[0m      method_descriptor\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "dict.update?"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 对豆瓣进行测试"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "b_crawler = MyCrawler('douban_book.txt')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "b_crawler.crawl(\n",
    "    'https://book.douban.com/tag/%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C',\n",
    "    'src=\"(.*?\\d+.jpg)\"[\\S\\s]*?<a\\shref=\"(https:\\/\\/book.douban.com/subject\\/\\d+/)\"\\stitle=\"(.*?)\"[\\S\\s]*?<div class=\"pub\">\\s*(.*?)\\s*<\\/div>[\\S\\s]*?<div class=\"star\\sclearfix\">\\s*([\\S\\s]*?)\\s*<\\/div>',\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [],
   "source": [
    "class MyDoubanCrawler(MyCrawler):\n",
    "    def extract(self, content, pattern_main, pattern_star):\n",
    "        result = re.findall(pattern_main, content)\n",
    "        for index in range(len(result)):\n",
    "#         for book_info in result:\n",
    "            if 'allstar' in result[index][4]:\n",
    "                items = re.findall(pattern_star, result[index][4])\n",
    "            else:\n",
    "                items = [['0', '0', '0']]\n",
    "            result[index] = list(result[index])\n",
    "            del result[index][4]\n",
    "            result[index].extend(items[0])\n",
    "#             print(result[index])\n",
    "        return result\n",
    "            \n",
    "    def crawl(self, url, pattern_main, pattern_star, headers=None):\n",
    "        if headers:\n",
    "            self.headers.update(headers)\n",
    "        content = self.download(url)\n",
    "        info = self.extract(content, pattern_main, pattern_star)\n",
    "        self.save(info)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [],
   "source": [
    "b_douban_crawler = MyDoubanCrawler('douban_book_new.txt')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {},
   "outputs": [],
   "source": [
    "b_douban_crawler.crawl(\n",
    "    'https://book.douban.com/tag/%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C',\n",
    "    'src=\"(.*?\\d+.jpg)\"[\\S\\s]*?<a\\shref=\"(https:\\/\\/book.douban.com/subject\\/\\d+/)\"\\stitle=\"(.*?)\"[\\S\\s]*?<div class=\"pub\">\\s*(.*?)\\s*<\\/div>[\\S\\s]*?<div class=\"star\\sclearfix\">\\s*([\\S\\s]*?)\\s*<\\/div>',\n",
    "    'allstar(\\d+)\"[\\S\\s]*?rating_nums\">([^<]*?)<\\/span>[\\S\\s]*?\\((\\d+)'\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "https://img1.doubanio.com/view/subject/s/public/s33631858.jpg|||https://book.douban.com/subject/35044046/|||绁炵粡缃戠粶涓庢繁搴﹀�︿範|||閭遍敗楣� / 鏈烘�板伐涓氬嚭鐗堢ぞ / 2020-4-10 / 149.00鍏億||45|||9.3|||149\n",
      "https://img9.doubanio.com/view/subject/s/public/s29738046.jpg|||https://book.douban.com/subject/30192800/|||Python绁炵粡缃戠粶缂栫▼|||[鑻盷濉旈噷鍏嬄锋媺甯屽痉锛圱ariq Rashid锛� / 鏋楄祼 / 浜烘皯閭�鐢靛嚭鐗堢ぞ / 2018-4 / 69.00鍏億||45|||9.2|||453\n",
      "https://img1.doubanio.com/view/subject/s/public/s29839337.jpg|||https://book.douban.com/subject/30293801/|||Python娣卞害瀛︿範|||[缇嶿 寮楁湕绱㈢摝鈥㈣倴鑾� / 寮犱寒 / 浜烘皯閭�鐢靛嚭鐗堢ぞ / 2018-8 / 119.00鍏億||50|||9.5|||570\n",
      "https://img9.doubanio.com/view/subject/s/public/s29815955.jpg|||https://book.douban.com/subject/30270959/|||娣卞害瀛︿範鍏ラ棬|||[ 鏃ワ冀  鏂嬭棨搴锋瘏 / 闄嗗畤鏉� / 浜烘皯閭�鐢靛嚭鐗堢ぞ / 2018-7 / 59.00鍏億||45|||9.4|||506\n",
      "https://img1.doubanio.com/view/subject/s/public/s32295077.jpg|||https://book.douban.com/subject/33414479/|||娣卞害瀛︿範鐨勬暟瀛�|||[鏃�]娑屼簳鑹�骞搞�乕鏃�]娑屼簳璐炵編 / 鏉ㄧ憺榫� / 浜烘皯閭�鐢靛嚭鐗堢ぞ / 2019-4 / 69.00鍏億||45|||9.0|||111\n",
      "https://img1.doubanio.com/view/subject/s/public/s33557648.jpg|||https://book.douban.com/subject/34941715/|||鏁板瓧鎬濈淮|||[钁�] 闃挎灄澶氣�㈠ゥ鍒╃淮鎷� / 鑳″皬閿� / 涓�淇″嚭鐗堢ぞ / 2020-1-1 / 69.00|||45|||8.6|||12\n",
      "https://img9.doubanio.com/view/subject/s/public/s33545334.jpg|||https://book.douban.com/subject/34927262/|||娣卞叆娴呭嚭鍥剧�炵粡缃戠粶锛欸NN鍘熺悊瑙ｆ瀽|||鍒樺繝闆ㄣ��鏉庡溅闇栥��鍛ㄦ磱銆�钁� / 鏈烘�板伐涓氬嚭鐗堢ぞ / 2019-12-25 / 89鍏億||25|||5.2|||38\n",
      "https://img9.doubanio.com/view/subject/s/public/s28855545.jpg|||https://book.douban.com/subject/26727997/|||Neural Networks and Deep Learning|||Michael Nielsen / 2016-1|||45|||9.4|||202\n",
      "https://img1.doubanio.com/view/subject/s/public/s29936638.jpg|||https://book.douban.com/subject/30236893/|||绁炵粡缃戠粶璁捐�★紙鍘熶功绗�2鐗堬級|||Martin T. Hagan銆丠oward B. Demuth銆丮ark H. Beale / 绔犳瘏 / 鏈烘�板伐涓氬嚭鐗堢ぞ / 2017-11 / 99.00鍏億||45|||8.8|||15\n",
      "https://img3.doubanio.com/view/subject/s/public/s4410591.jpg|||https://book.douban.com/subject/4146246/|||绁炵粡缃戠粶鍦ㄥ簲鐢ㄧ�戝�﹀拰宸ョ▼涓�鐨勫簲鐢▅||钀ㄩ┈鎷夎緵鑽� / 2010-1 / 88.00鍏億||0|||0|||0\n",
      "https://img3.doubanio.com/view/subject/s/public/s29249951.jpg|||https://book.douban.com/subject/26945232/|||Make Your Own Neural Network|||Tariq Rashid / CreateSpace Independent Publishing Platform / 2016-3-31 / USD 45.00|||50|||9.6|||54\n",
      "https://img9.doubanio.com/view/subject/s/public/s29877486.jpg|||https://book.douban.com/subject/30333961/|||鍥捐В娣卞害瀛︿範涓庣�炵粡缃戠粶锛氫粠寮犻噺鍒癟ensorFlow瀹炵幇|||寮犲钩 / 鐢靛瓙宸ヤ笟鍑虹増绀� / 2018-10 / 79.00鍏億||0|||0|||0\n",
      "https://img3.doubanio.com/view/subject/s/public/s28070570.jpg|||https://book.douban.com/subject/26388161/|||MATLAB绁炵粡缃戠粶43涓�妗堜緥鍒嗘瀽|||鐜嬪皬宸濄�佸彶宄般�侀儊纾娿�佹潕娲� / 鍖椾含鑸�绌鸿埅澶╁ぇ瀛﹀嚭鐗堢ぞ / 2013-8-1 / CNY 48.00|||45|||8.5|||18\n",
      "https://img3.doubanio.com/view/subject/s/public/s3898822.jpg|||https://book.douban.com/subject/2584657/|||Neural Networks and Learning Machines|||Simon O. Haykin / Pearson / 2008-11-28 / USD 252.40|||45|||8.7|||53\n",
      "https://img9.doubanio.com/view/subject/s/public/s1695376.jpg|||https://book.douban.com/subject/1138922/|||绁炵粡缃戠粶鍘熺悊(鍘熶功绗�2鐗�)|||Simon Haykin / 鍙朵笘浼熴�佸彶蹇犳�� / 鏈烘�板伐涓氬嚭鐗堢ぞ / 2004-1 / 69.00鍏億||35|||7.3|||54\n",
      "https://img9.doubanio.com/view/subject/s/public/s28342396.jpg|||https://book.douban.com/subject/26666358/|||杩炴帴缁勶細閫犲氨鐙�涓�鏃犱簩鐨勪綘|||[缇嶿 鎵跨幇宄� / 瀛欏ぉ榻� / 娓呭崕澶у�﹀嚭鐗堢ぞ / 2016-1 / 45|||45|||8.5|||285\n",
      "https://img1.doubanio.com/view/subject/s/public/s6458908.jpg|||https://book.douban.com/subject/3890040/|||绁炵粡缃戠粶鎺у埗|||寰愪附濞� / 2009-7 / 28.00鍏億||0|||0|||0\n",
      "https://img1.doubanio.com/view/subject/s/public/s28107307.jpg|||https://book.douban.com/subject/26422529/|||Neural Networks and Statistical Learning|||Ke-Lin Du銆丮. N. S. Swamy / Springer / 2013-12-7 / USD 129.00|||0|||0|||0\n",
      "https://img9.doubanio.com/view/subject/s/public/s1663944.jpg|||https://book.douban.com/subject/1159821/|||鎰忚瘑鐨勫畤瀹檤||[缇嶿 鏉版媺灏斿痉路鍩冨痉灏旀浖銆乕缇嶿 鏈卞埄娆�路鎵樿�哄凹 / 椤惧嚒鍙� / 涓婃捣绉戝�︽妧鏈�鍑虹増绀� / 2004-1 / 27.00鍏億||40|||8.3|||192\n",
      "https://img1.doubanio.com/view/subject/s/public/s6517517.jpg|||https://book.douban.com/subject/6529821/|||Unsupervised Learning|||A Bradford Book / 1999-6-11 / USD 40.00|||0|||0|||0\n"
     ]
    }
   ],
   "source": [
    "!cat douban_book_new.txt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "a = [1,2,3]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": [
    "del a[-1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[1, 2]"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "[1].extend"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "https://img1.doubanio.com/view/subject/s/public/s33631858.jpg|||https://book.douban.com/subject/35044046/|||神经网络与深度学习|||邱锡鹏 / 机械工业出版社 / 2020-4-10 / 149.00元|||<span class=\"allstar45\"></span>\n",
      "        <span class=\"rating_nums\">9.3</span>\n",
      "\n",
      "    <span class=\"pl\">\n",
      "        (149人评价)\n",
      "    </span>\n",
      "https://img9.doubanio.com/view/subject/s/public/s29738046.jpg|||https://book.douban.com/subject/30192800/|||Python神经网络编程|||[英]塔里克·拉希德（Tariq Rashid） / 林赐 / 人民邮电出版社 / 2018-4 / 69.00元|||<span class=\"allstar45\"></span>\n",
      "        <span class=\"rating_nums\">9.2</span>\n",
      "\n",
      "    <span class=\"pl\">\n",
      "        (452人评价)\n",
      "    </span>\n",
      "https://img1.doubanio.com/view/subject/s/public/s29839337.jpg|||https://book.douban.com/subject/30293801/|||Python深度学习|||[美] 弗朗索瓦•肖莱 / 张亮 / 人民邮电出版社 / 2018-8 / 119.00元|||<span class=\"allstar50\"></span>\n",
      "        <span class=\"rating_nums\">9.5</span>\n",
      "\n",
      "    <span class=\"pl\">\n",
      "        (570人评价)\n",
      "    </span>\n",
      "https://img9.doubanio.com/view/subject/s/public/s29815955.jpg|||https://book.douban.com/subject/30270959/|||深度学习入门|||[ 日］  斋藤康毅 / 陆宇杰 / 人民邮电出版社 / 2018-7 / 59.00元|||<span class=\"allstar45\"></span>\n",
      "        <span class=\"rating_nums\">9.4</span>\n",
      "\n",
      "    <span class=\"pl\">\n",
      "        (506人评价)\n",
      "    </span>\n",
      "https://img1.doubanio.com/view/subject/s/public/s32295077.jpg|||https://book.douban.com/subject/33414479/|||深度学习的数学|||[日]涌井良幸、[日]涌井贞美 / 杨瑞龙 / 人民邮电出版社 / 2019-4 / 69.00元|||<span class=\"allstar45\"></span>\n",
      "        <span class=\"rating_nums\">9.0</span>\n",
      "\n",
      "    <span class=\"pl\">\n",
      "        (111人评价)\n",
      "    </span>\n",
      "https://img1.doubanio.com/view/subject/s/public/s33557648.jpg|||https://book.douban.com/subject/34941715/|||数字思维|||[葡] 阿林多•奥利维拉 / 胡小锐 / 中信出版社 / 2020-1-1 / 69.00|||<span class=\"allstar45\"></span>\n",
      "        <span class=\"rating_nums\">8.6</span>\n",
      "\n",
      "    <span class=\"pl\">\n",
      "        (12人评价)\n",
      "    </span>\n",
      "https://img9.doubanio.com/view/subject/s/public/s33545334.jpg|||https://book.douban.com/subject/34927262/|||深入浅出图神经网络：GNN原理解析|||刘忠雨　李彦霖　周洋　著 / 机械工业出版社 / 2019-12-25 / 89元|||<span class=\"allstar25\"></span>\n",
      "        <span class=\"rating_nums\">5.2</span>\n",
      "\n",
      "    <span class=\"pl\">\n",
      "        (38人评价)\n",
      "    </span>\n",
      "https://img9.doubanio.com/view/subject/s/public/s28855545.jpg|||https://book.douban.com/subject/26727997/|||Neural Networks and Deep Learning|||Michael Nielsen / 2016-1|||<span class=\"allstar45\"></span>\n",
      "        <span class=\"rating_nums\">9.4</span>\n",
      "\n",
      "    <span class=\"pl\">\n",
      "        (202人评价)\n",
      "    </span>\n",
      "https://img1.doubanio.com/view/subject/s/public/s29936638.jpg|||https://book.douban.com/subject/30236893/|||神经网络设计（原书第2版）|||Martin T. Hagan、Howard B. Demuth、Mark H. Beale / 章毅 / 机械工业出版社 / 2017-11 / 99.00元|||<span class=\"allstar45\"></span>\n",
      "        <span class=\"rating_nums\">8.8</span>\n",
      "\n",
      "    <span class=\"pl\">\n",
      "        (15人评价)\n",
      "    </span>\n",
      "https://img3.doubanio.com/view/subject/s/public/s4410591.jpg|||https://book.douban.com/subject/4146246/|||神经网络在应用科学和工程中的应用|||萨马拉辛荷 / 2010-1 / 88.00元|||<span class=\"pl\">\n",
      "        (少于10人评价)\n",
      "    </span>\n",
      "https://img3.doubanio.com/view/subject/s/public/s29249951.jpg|||https://book.douban.com/subject/26945232/|||Make Your Own Neural Network|||Tariq Rashid / CreateSpace Independent Publishing Platform / 2016-3-31 / USD 45.00|||<span class=\"allstar50\"></span>\n",
      "        <span class=\"rating_nums\">9.6</span>\n",
      "\n",
      "    <span class=\"pl\">\n",
      "        (54人评价)\n",
      "    </span>\n",
      "https://img9.doubanio.com/view/subject/s/public/s29877486.jpg|||https://book.douban.com/subject/30333961/|||图解深度学习与神经网络：从张量到TensorFlow实现|||张平 / 电子工业出版社 / 2018-10 / 79.00元|||<span class=\"pl\">\n",
      "        (少于10人评价)\n",
      "    </span>\n",
      "https://img3.doubanio.com/view/subject/s/public/s28070570.jpg|||https://book.douban.com/subject/26388161/|||MATLAB神经网络43个案例分析|||王小川、史峰、郁磊、李洋 / 北京航空航天大学出版社 / 2013-8-1 / CNY 48.00|||<span class=\"allstar45\"></span>\n",
      "        <span class=\"rating_nums\">8.5</span>\n",
      "\n",
      "    <span class=\"pl\">\n",
      "        (18人评价)\n",
      "    </span>\n",
      "https://img3.doubanio.com/view/subject/s/public/s3898822.jpg|||https://book.douban.com/subject/2584657/|||Neural Networks and Learning Machines|||Simon O. Haykin / Pearson / 2008-11-28 / USD 252.40|||<span class=\"allstar45\"></span>\n",
      "        <span class=\"rating_nums\">8.7</span>\n",
      "\n",
      "    <span class=\"pl\">\n",
      "        (53人评价)\n",
      "    </span>\n",
      "https://img9.doubanio.com/view/subject/s/public/s1695376.jpg|||https://book.douban.com/subject/1138922/|||神经网络原理(原书第2版)|||Simon Haykin / 叶世伟、史忠植 / 机械工业出版社 / 2004-1 / 69.00元|||<span class=\"allstar35\"></span>\n",
      "        <span class=\"rating_nums\">7.3</span>\n",
      "\n",
      "    <span class=\"pl\">\n",
      "        (54人评价)\n",
      "    </span>\n",
      "https://img9.doubanio.com/view/subject/s/public/s28342396.jpg|||https://book.douban.com/subject/26666358/|||连接组：造就独一无二的你|||[美] 承现峻 / 孙天齐 / 清华大学出版社 / 2016-1 / 45|||<span class=\"allstar45\"></span>\n",
      "        <span class=\"rating_nums\">8.5</span>\n",
      "\n",
      "    <span class=\"pl\">\n",
      "        (285人评价)\n",
      "    </span>\n",
      "https://img1.doubanio.com/view/subject/s/public/s6458908.jpg|||https://book.douban.com/subject/3890040/|||神经网络控制|||徐丽娜 / 2009-7 / 28.00元|||<span class=\"pl\">\n",
      "        (少于10人评价)\n",
      "    </span>\n",
      "https://img1.doubanio.com/view/subject/s/public/s28107307.jpg|||https://book.douban.com/subject/26422529/|||Neural Networks and Statistical Learning|||Ke-Lin Du、M. N. S. Swamy / Springer / 2013-12-7 / USD 129.00|||<span class=\"pl\">\n",
      "        (少于10人评价)\n",
      "    </span>\n",
      "https://img9.doubanio.com/view/subject/s/public/s1663944.jpg|||https://book.douban.com/subject/1159821/|||意识的宇宙|||[美] 杰拉尔德·埃德尔曼、[美] 朱利欧·托诺尼 / 顾凡及 / 上海科学技术出版社 / 2004-1 / 27.00元|||<span class=\"allstar40\"></span>\n",
      "        <span class=\"rating_nums\">8.3</span>\n",
      "\n",
      "    <span class=\"pl\">\n",
      "        (192人评价)\n",
      "    </span>\n",
      "https://img1.doubanio.com/view/subject/s/public/s6517517.jpg|||https://book.douban.com/subject/6529821/|||Unsupervised Learning|||A Bradford Book / 1999-6-11 / USD 40.00|||<span class=\"pl\">\n",
      "        (少于10人评价)\n",
      "    </span>\n",
      "\n"
     ]
    }
   ],
   "source": [
    "with open('douban_book.txt', encoding='utf-8') as f:\n",
    "    print(f.read())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests\n",
    "\n",
    "\n",
    "headers = {\n",
    "    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36',\n",
    "}\n",
    "\n",
    "response = requests.get('https://book.douban.com/tag/%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C', headers=headers)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "48038"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(response.text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "assert('Neural Networks and Deep Learning' in response.text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "import re"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('https://img1.doubanio.com/view/subject/s/public/s33631858.jpg',\n",
       "  'https://book.douban.com/subject/35044046/',\n",
       "  '神经网络与深度学习',\n",
       "  '邱锡鹏 / 机械工业出版社 / 2020-4-10 / 149.00元',\n",
       "  '<span class=\"allstar45\"></span>\\n        <span class=\"rating_nums\">9.3</span>\\n\\n    <span class=\"pl\">\\n        (149人评价)\\n    </span>'),\n",
       " ('https://img9.doubanio.com/view/subject/s/public/s29738046.jpg',\n",
       "  'https://book.douban.com/subject/30192800/',\n",
       "  'Python神经网络编程',\n",
       "  '[英]塔里克·拉希德（Tariq Rashid） / 林赐 / 人民邮电出版社 / 2018-4 / 69.00元',\n",
       "  '<span class=\"allstar45\"></span>\\n        <span class=\"rating_nums\">9.2</span>\\n\\n    <span class=\"pl\">\\n        (450人评价)\\n    </span>'),\n",
       " ('https://img1.doubanio.com/view/subject/s/public/s29839337.jpg',\n",
       "  'https://book.douban.com/subject/30293801/',\n",
       "  'Python深度学习',\n",
       "  '[美] 弗朗索瓦•肖莱 / 张亮 / 人民邮电出版社 / 2018-8 / 119.00元',\n",
       "  '<span class=\"allstar50\"></span>\\n        <span class=\"rating_nums\">9.5</span>\\n\\n    <span class=\"pl\">\\n        (569人评价)\\n    </span>'),\n",
       " ('https://img9.doubanio.com/view/subject/s/public/s29815955.jpg',\n",
       "  'https://book.douban.com/subject/30270959/',\n",
       "  '深度学习入门',\n",
       "  '[ 日］  斋藤康毅 / 陆宇杰 / 人民邮电出版社 / 2018-7 / 59.00元',\n",
       "  '<span class=\"allstar45\"></span>\\n        <span class=\"rating_nums\">9.4</span>\\n\\n    <span class=\"pl\">\\n        (505人评价)\\n    </span>'),\n",
       " ('https://img1.doubanio.com/view/subject/s/public/s32295077.jpg',\n",
       "  'https://book.douban.com/subject/33414479/',\n",
       "  '深度学习的数学',\n",
       "  '[日]涌井良幸、[日]涌井贞美 / 杨瑞龙 / 人民邮电出版社 / 2019-4 / 69.00元',\n",
       "  '<span class=\"allstar45\"></span>\\n        <span class=\"rating_nums\">9.0</span>\\n\\n    <span class=\"pl\">\\n        (110人评价)\\n    </span>'),\n",
       " ('https://img1.doubanio.com/view/subject/s/public/s33557648.jpg',\n",
       "  'https://book.douban.com/subject/34941715/',\n",
       "  '数字思维',\n",
       "  '[葡] 阿林多•奥利维拉 / 胡小锐 / 中信出版社 / 2020-1-1 / 69.00',\n",
       "  '<span class=\"allstar45\"></span>\\n        <span class=\"rating_nums\">8.6</span>\\n\\n    <span class=\"pl\">\\n        (12人评价)\\n    </span>'),\n",
       " ('https://img9.doubanio.com/view/subject/s/public/s33545334.jpg',\n",
       "  'https://book.douban.com/subject/34927262/',\n",
       "  '深入浅出图神经网络：GNN原理解析',\n",
       "  '刘忠雨\\u3000李彦霖\\u3000周洋\\u3000著 / 机械工业出版社 / 2019-12-25 / 89元',\n",
       "  '<span class=\"allstar25\"></span>\\n        <span class=\"rating_nums\">5.2</span>\\n\\n    <span class=\"pl\">\\n        (38人评价)\\n    </span>'),\n",
       " ('https://img9.doubanio.com/view/subject/s/public/s28855545.jpg',\n",
       "  'https://book.douban.com/subject/26727997/',\n",
       "  'Neural Networks and Deep Learning',\n",
       "  'Michael Nielsen / 2016-1',\n",
       "  '<span class=\"allstar45\"></span>\\n        <span class=\"rating_nums\">9.4</span>\\n\\n    <span class=\"pl\">\\n        (202人评价)\\n    </span>'),\n",
       " ('https://img1.doubanio.com/view/subject/s/public/s29936638.jpg',\n",
       "  'https://book.douban.com/subject/30236893/',\n",
       "  '神经网络设计（原书第2版）',\n",
       "  'Martin T. Hagan、Howard B. Demuth、Mark H. Beale / 章毅 / 机械工业出版社 / 2017-11 / 99.00元',\n",
       "  '<span class=\"allstar45\"></span>\\n        <span class=\"rating_nums\">8.8</span>\\n\\n    <span class=\"pl\">\\n        (15人评价)\\n    </span>'),\n",
       " ('https://img3.doubanio.com/view/subject/s/public/s4410591.jpg',\n",
       "  'https://book.douban.com/subject/4146246/',\n",
       "  '神经网络在应用科学和工程中的应用',\n",
       "  '萨马拉辛荷 / 2010-1 / 88.00元',\n",
       "  '<span class=\"pl\">\\n        (少于10人评价)\\n    </span>'),\n",
       " ('https://img3.doubanio.com/view/subject/s/public/s29249951.jpg',\n",
       "  'https://book.douban.com/subject/26945232/',\n",
       "  'Make Your Own Neural Network',\n",
       "  'Tariq Rashid / CreateSpace Independent Publishing Platform / 2016-3-31 / USD 45.00',\n",
       "  '<span class=\"allstar50\"></span>\\n        <span class=\"rating_nums\">9.6</span>\\n\\n    <span class=\"pl\">\\n        (54人评价)\\n    </span>'),\n",
       " ('https://img9.doubanio.com/view/subject/s/public/s29877486.jpg',\n",
       "  'https://book.douban.com/subject/30333961/',\n",
       "  '图解深度学习与神经网络：从张量到TensorFlow实现',\n",
       "  '张平 / 电子工业出版社 / 2018-10 / 79.00元',\n",
       "  '<span class=\"pl\">\\n        (少于10人评价)\\n    </span>'),\n",
       " ('https://img3.doubanio.com/view/subject/s/public/s28070570.jpg',\n",
       "  'https://book.douban.com/subject/26388161/',\n",
       "  'MATLAB神经网络43个案例分析',\n",
       "  '王小川、史峰、郁磊、李洋 / 北京航空航天大学出版社 / 2013-8-1 / CNY 48.00',\n",
       "  '<span class=\"allstar45\"></span>\\n        <span class=\"rating_nums\">8.5</span>\\n\\n    <span class=\"pl\">\\n        (18人评价)\\n    </span>'),\n",
       " ('https://img3.doubanio.com/view/subject/s/public/s3898822.jpg',\n",
       "  'https://book.douban.com/subject/2584657/',\n",
       "  'Neural Networks and Learning Machines',\n",
       "  'Simon O. Haykin / Pearson / 2008-11-28 / USD 252.40',\n",
       "  '<span class=\"allstar45\"></span>\\n        <span class=\"rating_nums\">8.7</span>\\n\\n    <span class=\"pl\">\\n        (53人评价)\\n    </span>'),\n",
       " ('https://img9.doubanio.com/view/subject/s/public/s1695376.jpg',\n",
       "  'https://book.douban.com/subject/1138922/',\n",
       "  '神经网络原理(原书第2版)',\n",
       "  'Simon Haykin / 叶世伟、史忠植 / 机械工业出版社 / 2004-1 / 69.00元',\n",
       "  '<span class=\"allstar35\"></span>\\n        <span class=\"rating_nums\">7.3</span>\\n\\n    <span class=\"pl\">\\n        (54人评价)\\n    </span>'),\n",
       " ('https://img9.doubanio.com/view/subject/s/public/s28342396.jpg',\n",
       "  'https://book.douban.com/subject/26666358/',\n",
       "  '连接组：造就独一无二的你',\n",
       "  '[美] 承现峻 / 孙天齐 / 清华大学出版社 / 2016-1 / 45',\n",
       "  '<span class=\"allstar45\"></span>\\n        <span class=\"rating_nums\">8.5</span>\\n\\n    <span class=\"pl\">\\n        (285人评价)\\n    </span>'),\n",
       " ('https://img1.doubanio.com/view/subject/s/public/s6458908.jpg',\n",
       "  'https://book.douban.com/subject/3890040/',\n",
       "  '神经网络控制',\n",
       "  '徐丽娜 / 2009-7 / 28.00元',\n",
       "  '<span class=\"pl\">\\n        (少于10人评价)\\n    </span>'),\n",
       " ('https://img1.doubanio.com/view/subject/s/public/s28107307.jpg',\n",
       "  'https://book.douban.com/subject/26422529/',\n",
       "  'Neural Networks and Statistical Learning',\n",
       "  'Ke-Lin Du、M. N. S. Swamy / Springer / 2013-12-7 / USD 129.00',\n",
       "  '<span class=\"pl\">\\n        (少于10人评价)\\n    </span>'),\n",
       " ('https://img9.doubanio.com/view/subject/s/public/s1663944.jpg',\n",
       "  'https://book.douban.com/subject/1159821/',\n",
       "  '意识的宇宙',\n",
       "  '[美] 杰拉尔德·埃德尔曼、[美] 朱利欧·托诺尼 / 顾凡及 / 上海科学技术出版社 / 2004-1 / 27.00元',\n",
       "  '<span class=\"allstar40\"></span>\\n        <span class=\"rating_nums\">8.3</span>\\n\\n    <span class=\"pl\">\\n        (192人评价)\\n    </span>'),\n",
       " ('https://img1.doubanio.com/view/subject/s/public/s6517517.jpg',\n",
       "  'https://book.douban.com/subject/6529821/',\n",
       "  'Unsupervised Learning',\n",
       "  'A Bradford Book / 1999-6-11 / USD 40.00',\n",
       "  '<span class=\"pl\">\\n        (少于10人评价)\\n    </span>')]"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "re.findall(\n",
    "    'src=\"(.*?\\d+.jpg)\"[\\S\\s]*?<a\\shref=\"(https:\\/\\/book.douban.com/subject\\/\\d+/)\"\\stitle=\"(.*?)\"[\\S\\s]*?<div class=\"pub\">\\s*(.*?)\\s*<\\/div>[\\S\\s]*?<div class=\"star\\sclearfix\">\\s*([\\S\\s]*?)\\s*<\\/div>',\n",
    "    response.text\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
